George Seif
library(ggplot2)
library(dplyr)
library(MeanShift)
library(plotly)
library(knitr)
opts_chunk$set(fig.width=9.5, warning=FALSE)
address <- url("http://www.trutschnig.net/RTR2015.RData")
load(address)
df <- RTR2015[sample(nrow(RTR2015), 500), ]
plot_ly(df, x = ~longitude, y = ~latitude, size=~rtr_speed_dl)
## No trace type specified:
## Based on info supplied, a 'scatter' trace seems appropriate.
## Read more about this trace type -> https://plot.ly/r/reference/#scatter
## No scatter mode specifed:
## Setting the mode to markers
## Read more about this attribute -> https://plot.ly/r/reference/#scatter-mode
spatial_df_t <- t(df[,c("longitude", "latitude", "rtr_speed_dl")])
Apply standardization and form a set of candidate bandwiths.
spatial_df_t <- spatial_df_t / apply(spatial_df_t, 1, sd)
h.cand <- quantile(dist(t(spatial_df_t)), seq(0.05, 0.40, by=0.05))
system.time(
bms.clustering <- lapply(h.cand,
function(h){
bmsClustering(spatial_df_t, h=h)
}
)
)
##
## Running blurring mean-shift algorithm...
## Blurring mean-shift algorithm ran successfully.
## Finding clusters...
##
## The algorithm found 91 clusters.
##
## Running blurring mean-shift algorithm...
## Blurring mean-shift algorithm ran successfully.
## Finding clusters...
##
## The algorithm found 39 clusters.
##
## Running blurring mean-shift algorithm...
## Blurring mean-shift algorithm ran successfully.
## Finding clusters...
##
## The algorithm found 20 clusters.
##
## Running blurring mean-shift algorithm...
## Blurring mean-shift algorithm ran successfully.
## Finding clusters...
##
## The algorithm found 10 clusters.
##
## Running blurring mean-shift algorithm...
## Blurring mean-shift algorithm ran successfully.
## Finding clusters...
##
## The algorithm found 8 clusters.
##
## Running blurring mean-shift algorithm...
## Blurring mean-shift algorithm ran successfully.
## Finding clusters...
##
## The algorithm found 7 clusters.
##
## Running blurring mean-shift algorithm...
## Blurring mean-shift algorithm ran successfully.
## Finding clusters...
##
## The algorithm found 5 clusters.
##
## Running blurring mean-shift algorithm...
## Blurring mean-shift algorithm ran successfully.
## Finding clusters...
##
## The algorithm found 2 clusters.
## user system elapsed
## 92.62 0.27 97.17
resulting_df_s <- as.data.frame(t(spatial_df_t)) %>% select(longitude, latitude, rtr_speed_dl) %>% mutate(cluster = as.factor(bms.clustering[[6]]$labels))
(resulting_df <- df %>%
select(longitude, latitude, rtr_speed_dl) %>%
mutate(cluster = as.factor(bms.clustering[[6]]$labels), rtr_speed_dl_s = rtr_speed_dl/sd(rtr_speed_dl)))
g <- list(
scope = 'europe',
projection = list(type = 'natural earth'),
showland = TRUE,
countrywidth = 1,
subunitwidth = 1
)
plot_geo(resulting_df, lon = ~longitude, lat = ~latitude) %>%
add_markers(size=~rtr_speed_dl_s
, color=~cluster
, text=~paste(paste("Download Speed: ", rtr_speed_dl)
, paste("Long: ", longitude)
, paste("Lat: ", latitude)
, sep = "<br />"
)
, hoverinfo="text"
) %>%
layout(
title = 'Mobile download speed in Austria'
, geo = g
, mapbox = list(style = "satellite-streets")
)
plot( spatial_df_t[1,], spatial_df_t[2,], col=bms.clustering[[5]]$labels,
xlab="longitude", ylab="latitude", main="Mean shift labels",
cex=spatial_df_t[3,], pch=16 )
points( bms.clustering[[5]]$components[1,], bms.clustering[[5]]$components[2,], col=1:ncol( bms.clustering[[5]]$components ),
pch="+", cex=3 )
p <- plot_ly(spatial_df_t %>%
t() %>%
as.data.frame()
, x = ~longitude
, y = ~latitude
, z = ~rtr_speed_dl) %>%
add_markers(color = bms.clustering[[5]]$labels)
p